## ----------------------------------------------------------------------------------------
#| echo: false
#| message: false
#| warning: false

library(here)
library(tidyverse)
library(sjmisc)
library(vtable)
library(psych) 
library(GPArotation)
library(mirt)
library(lavaan)
library(semTable)
library(modelsummary)
library(marginaleffects)
library(kableExtra)
library(patchwork)
library(ggrepel)
library(xtable)
library(conflicted)



## ----------------------------------------------------------------------------------------
#| echo: false

# We import the data and we elminate the two populism variables. 
# We have added these variables to the dataset based on the analysis here, i.e. after the fact.
# Since we create these variable in this paper we need to remove them for the analysis.


df_integrated <- readRDS(here("data/poppa_integrated.rds")) %>%
  select(-c(populism_cfa,
            populism_cfa_rescaled))


# We make wave a factor. wave 1 is the reference category. 

df_integrated$wave <- forcats::as_factor(df_integrated$wave) 


# We make the party family variable a factor and Christian democracy the reference category. 

df_integrated$family_label <- forcats::as_factor(df_integrated$family_label)

df_integrated$family_label <- forcats::fct_relevel(df_integrated$family_label, "Christian Democratic")


df_integrated <- df_integrated %>%
  mutate(regions = forcats::fct_collapse(country,
                                         West = c("Austria",
                                                  "Belgium",
                                                  "Denmark",
                                                  "Finland",
                                                  "France",
                                                  "Germany",
                                                  "Iceland",
                                                  "Ireland",
                                                  "Luxembourg",
                                                  "Netherlands",
                                                  "Norway",
                                                  "Sweden",
                                                  "Switzerland",
                                                  "United Kingdom"),
                                         South = c("Cyprus",
                                                   "Greece",
                                                   "Italy",
                                                   "Malta",
                                                   "Portugal",
                                                   "Spain"),
                                         East  = c("Bulgaria",
                                                   "Croatia",
                                                   "Czech Republic",
                                                   "Estonia",
                                                   "Hungary",
                                                   "Latvia",
                                                   "Lithuania",
                                                   "Poland",
                                                   "Romania",
                                                   "Slovakia",
                                                   "Slovenia"))) 


# We reverse the lifestyle item for use below. 
# This transforms the lifestyle variable as lower values more liberal and higher values more traditional.
# We create a new variable for them with `_rev`. 

df_integrated <- df_integrated %>%
  mutate(lifestyle_rev = (10 - lifestyle)) 


# We create an authoritarian item by combining lifestyle_rev and laworder.
# This makes higher values as more authoritarian.

df_integrated <- df_integrated %>%
  dplyr::rowwise() %>%
  mutate(authoritarian = mean(c(lifestyle_rev, laworder))) %>%
  ungroup()


# We mean center nativism, authoritarian and lrecon for the interaction effects. 

df_integrated <- df_integrated %>% 
  mutate(nativism_centered = nativism - mean(nativism, na.rm = TRUE),
         authoritarian_centered = authoritarian - mean(authoritarian, na.rm = TRUE),
         lrecon_centered = lrecon - mean(lrecon, na.rm = TRUE) 
  )


# We create two vectors for graphing. It is easier to do now so we know where to find them if we move things around later.

populist_items <- c(
  "Anti-Elitism" = "antielitism",
  "People Centrism" = "peoplecentrism",
  "Manichean" = "manichean",
  "General Will" = "generalwill",
  "Indivisible" = "indivisible"
)


# We create a vector to order the items for later.

populism_items_order <- c("Anti-Elitism",
                          "People Centrism",
                          "General Will",
                          "Manichean",
                          "Indivisible") 



# We also create a custom colour palette for plots for later. 

custom_palette <- c(
  "#ffbb78",  # Light Orange (Light)
  "#7f7f7f",  # Gray (Dark)
  "#f7b6d2",  # Light Pink (Light)
  "#98df8a",  # Light Green (Light)
  "#c49c94",  # Light Brown (Light)
  "#17becf",  # Cyan (Medium)
  "#bcbd22",  # Yellow-Green (Medium)
  "#2ca02c",  # Green (Dark)
  "#1f77b4",  # Blue (Dark)
  "#e377c2",  # Pink (Light)
  "#ff7f0e",  # Orange (Medium)  "#d62728",  # Red (Dark)
  "#9467bd",  # Purple (Dark)
  "#008080"   # Dark Teal 
)




## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Descriptive Statistics for Wave 2 and Wave 1
#| label: tbl-summary-statistics


# We create a descriptive table for both waves.
# we use the vtable package.
# We create a new dataframe for the vtable to be used for the descriptive table. 
# We do this just to keep things separate since we do some special manipulations for the table.


df_integrated_table <- df_integrated


# We run the CFA for the descriptive table. 
# The explanation and the full process for creating the CFA populism variable can be found below.


cfa_cov_3_table <- '

populism_cfa_cov_3_table =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

generalwill ~~ indivisible
peoplecentrism ~~ antielitism
manichean ~~  antielitism

'

# Fit the model to the data.

fit_cov_3_table <- cfa(model = cfa_cov_3_table, data = df_integrated_table)


# We create a function to attach the regression scores to the dataframe.
# We do this since we attach factor scores several times and we want to be sure that there is no overlap or confusion between dataframes.


attach_factor_scores <- function(df, fit_model) {
  # Step 1: Extract case indices
  idx <- lavInspect(fit_model, "case.idx")
  
  # Step 2: Extract factor scores
  fscores <- lavPredict(fit_model, method = "regression")
  
  # Step 3: Loop over each factor score and attach to the dataframe
  for (fs in colnames(fscores)) {
    # Attach the factor scores to the dataframe using the case indices
    df[idx, fs] <- fscores[, fs]
  }
  
  # Return the modified dataframe
  return(df)
}

# We attach the regression scores to the dataset with the function we just created.

df_integrated_table <- attach_factor_scores(df_integrated_table, fit_cov_3_table)


# We rescale the CFA populism variable.

df_integrated_table$populism_cfa_resc_table <- scales::rescale(df_integrated_table$populism_cfa_cov_3_table, to = c(0, 10))


# We create a column for the t-test.

# We initialize an empty vector to store p-values.

p_values <- c()

# We list the variables to perform the t-tests on.

variables <- c("populism_mean", "populism_cfa_resc_table", "populism_cfa_cov_3_table", "antielitism", "generalwill", "manichean", "indivisible", 
               "peoplecentrism", "lroverall", "lrecon", "immigration", "nativism", "authoritarian",
               "laworder", "lifestyle", "eu")

# We perform the t-tests and store the p-values.

for (var in variables) {
  wave1_data <- df_integrated_table %>% dplyr::filter(wave == "Wave 1 - 2018") %>% pull(var)
  wave2_data <- df_integrated_table %>% dplyr::filter(wave == "Wave 2 - 2023") %>% pull(var)
  
  t_test_result <- t.test(wave1_data, wave2_data, var.equal = FALSE) # This preforms the Welch's t-test.
  p_values <- c(p_values, t_test_result$p.value)
}

# We add the p-values and variables to the dataframe.

p_values_df <- data.frame(Variable = variables, P_Value = round(p_values, 2))


# We create the descriptive table with vtable.
# We first rename the variables. 

labs_waves_shorter <- data.frame(populism_mean = 'Populism (Mean)',
                                 populism_cfa_resc_table = 'Populism (CFA: Rescaled)',
                                 populism_cfa_cov_3_table = 'Populism (CFA: Standardized)',
                                 antielitism = 'Anti-Elitism',
                                 generalwill = 'Genernal Will',
                                 manichean   = 'Manichean',
                                 indivisible = 'Indivisible',
                                 peoplecentrism = 'People Centrism',
                                 lroverall = 'Left-Right Overall',
                                 lrecon = 'Left-Right Economy',
                                 immigration = 'Immigration',
                                 nativism = 'Nativism',
                                 authoritarian = 'Authoritarian',
                                 laworder = 'Law and Order',
                                 lifestyle = 'Lifestyle',
                                 eu = 'European Integration',
                                 wave = 'Wave')


# We create the descriptive table with vtable.

table_waves_shorter <- df_integrated_table %>%
  select(populism_mean,
         populism_cfa_resc_table,
         populism_cfa_cov_3_table,
         antielitism,
         generalwill, 
         manichean,  
         indivisible, 
         peoplecentrism, 
         lroverall, 
         lrecon,
         immigration,
         nativism, 
         authoritarian,
         laworder,
         lifestyle,
         eu,
         wave) %>%
  mutate(wave = forcats::fct_rev(wave) ) %>%
  vtable::sumtable(labels = labs_waves_shorter,
                   group = "wave",
                   out = 'return')

# We do some manipulation to join the p-value column to the dataset.

# We insert an empty value at the top of the p_values_df to align with the header row.
p_values_with_header <- c(NA, p_values_df$P_Value)

# We convert it back to a dataframe for consistency.
p_values_with_header_df <- data.frame(P_Value = p_values_with_header)

# We append the p-values column to the table, which now includes a placeholder for the header row.
table_waves_shorter <- cbind(table_waves_shorter, p_values_with_header_df)

# We replace NA in the P_Value column with an empty string and rename the column. 
table_waves_shorter$P_Value[is.na(table_waves_shorter$P_Value)] <- ""

# We rename the P_Value column.
colnames(table_waves_shorter)[colnames(table_waves_shorter) == "P_Value"] <- "T-Test"

# We remove the first row from the data.
table_waves_shorter <- table_waves_shorter[-1, ]


table_waves_shorter %>%
  kable(
    format = "latex",  
    booktabs = TRUE,
    linesep = "",
    row.names = FALSE
  ) %>%
  kable_classic() %>% 
  kable_styling(
    full_width = FALSE, 
    position = "center", 
    bootstrap_options = c("striped", "hover"),
  ) %>%
  column_spec(2, width = "8em") %>%
  column_spec(5, width = "8em") %>%
  add_header_above(c(" " = 1, "Wave 2 (2023)" = 3, "Wave 1 (2018)" = 3, " " = 1)) %>%
  row_spec(0, bold = TRUE) %>%
  footnote(general = "Welch two-sided t-test",
           general_title = "Note:", 
           footnote_as_chunk = TRUE)



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| label: tbl-latent-mean
#| tbl-cap: Latent Mean for Populism CFA

# We create a table for the latent means.
# To avoid colliding variables with Lavaan we create a new dataset for the latent means.

df_integrated_latent_means <- df_integrated


# We run the same model for the CFA.

cfa_cov_3_means <- '

populism_cfa_cov_3_means =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

generalwill ~~ indivisible
peoplecentrism ~~ antielitism
manichean ~~  antielitism

'

# We run a  model to obtain the latent means.

fit_cov_3_means <- cfa(model = cfa_cov_3_means, data = df_integrated_latent_means,
                            group = "wave",
                            group.equal = c("loadings", "intercepts"))    



# We extract parameter estimates including latent means.

estimates <- parameterEstimates(fit_cov_3_means)


# We create a table with the latent means. 

# We prepare the data as a dataframe.
estimates_table <- estimates %>%
  dplyr::filter(lhs == "populism_cfa_cov_3_means",
                op == "~1",      
                group == 2) %>%  # group 2 is Wave 2 (estimated)
  select(
    Variable = lhs,
    Est = est,
    SE = se,
    `p-value` = pvalue
  ) %>%
  mutate(
    Variable = "Difference in Populism (Wave 2 - Wave 1)",
    Est = round(Est, 3),
    SE = round(SE, 3),
    `p-value` = round(`p-value`, 3)
  )

# We create table with kableExtra.

estimates_table %>%
  kable(
    format = "latex", 
    booktabs = TRUE, 
    col.names = c("Variable", "Estimate", "Std. Error", "p-value"),
    escape = FALSE
  ) %>%
  kable_classic() %>% 
  kable_styling(
    full_width = FALSE, 
    position = "center",
  )



## ----------------------------------------------------------------------------------------
#| echo: false
#| label: tbl-factor-analysis
#| tbl-cap: Exploratory Factor Analysis

# We run a EFA with maximum likelihood estimation.
# Since we only have on dimension we cannot rotate.

factor_full_dataset <- df_integrated %>%
  select(all_of(populist_items)) %>%
  rename_with(~ names(populist_items), everything())%>%
  psych::fa(nfactors = 1, SMC = TRUE, fm = 'ml')

factor_wave1_dataset <- df_integrated %>%
  dplyr::filter(wave == "Wave 1 - 2018") %>%
  select(all_of(populist_items)) %>%
  rename_with(~ names(populist_items), everything())%>%
  psych::fa(nfactors = 1, SMC = TRUE, fm = 'ml')

factor_wave2_dataset <- df_integrated %>%
  dplyr::filter(wave == "Wave 2 - 2023") %>%
  select(all_of(populist_items)) %>%
  rename_with(~ names(populist_items), everything())%>%
  psych::fa(nfactors = 1, SMC = TRUE, fm = 'ml')


# We create a table to show the results.

# We create a function to prepare loadings for the data frame.

prepare_loadings_df <- function(factor_result) {
  loadings <- as.data.frame(factor_result$loadings[])
  loadings$Communality <- factor_result$communality
  loadings$Uniqueness <- factor_result$uniquenesses
  loadings$Complexity <- factor_result$complexity
  loadings <- tibble::rownames_to_column(loadings, "Variable")
  loadings <- loadings %>% mutate(across(where(is.numeric), ~round(.x, 3)))
  loadings <- rename(loadings, `Factor 1` = `ML1`)
  return(loadings)
}

# We prepare the loadings for the the dataframes.

loadings_full <- prepare_loadings_df(factor_full_dataset)
loadings_wave1 <- prepare_loadings_df(factor_wave1_dataset)
loadings_wave2 <- prepare_loadings_df(factor_wave2_dataset)



# We combine the results into a single table.

# We create the headings for each section.

heading_full <- data.frame(
  Variable = "Full Dataset", 
  `Factor 1` = NA, 
  Communality = NA, 
  Uniqueness = NA, 
  Complexity = NA
)

heading_wave1 <- data.frame(
  Variable = "Wave 1", 
  `Factor 1` = NA, 
  Communality = NA, 
  Uniqueness = NA, 
  Complexity = NA
)

heading_wave2 <- data.frame(
  Variable = "Wave 2", 
  `Factor 1` = NA, 
  Communality = NA, 
  Uniqueness = NA, 
  Complexity = NA
)

# We combine the results into a single table.

efa_table <- bind_rows(
  heading_full, loadings_full,
  heading_wave1, loadings_wave1,
  heading_wave2, loadings_wave2
) %>%
  select(-Factor.1) %>%
  relocate(`Factor 1`, .after = Variable) %>%
  mutate(across(everything(), ~ifelse(is.na(.), "", .)))

efa_table %>%
  kable(format = "latex", booktabs = TRUE, escape = FALSE, linesep = "") %>%
  kable_classic %>% 
  kable_styling(full_width = FALSE, font_size = 8) %>%
  row_spec(which(efa_table$Variable %in% c("Full Dataset", "Wave 1", "Wave 2")), bold = TRUE) %>%
  column_spec(1, width = "12em") %>%  
  column_spec(2:5, width = "6em")    



## ----------------------------------------------------------------------------------------
#| echo: false
#| eval: true
#| output: false

# We run the full CFA models for all four models (baseline model; 1 residual covariance; 2 residual covariances; 3 residual convariances).
# We also produce the fit for the models and we produce the tables for the models. 
# We only use the models with 3 convariances in the main document for the analyses.
# We render the other models in the tables in the Appendix. 
# We also show the modification indices in the Appendix with a discussion of the proofs. 


# --------------

# We fit the CFA for the populist items. 

# 1. This is the Baseline Model. 

# Define the model

cfa_basic_full <- '

populism_cfa_basic =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

'

# Fit the model to the data

fit_basic_full <- cfa(model = cfa_basic_full, data = df_integrated)


# Summarize the results

# summary(fit_basic_full, fit.measures = TRUE, standardized = TRUE)

# modificationIndices(fit_basic_full, sort = TRUE)

# The iterations ends normally and the degrees of freedom are 5.
# But the `fit_basic_full` model does not produce a very good fit.


# 2. This model will allow the residuals of 'generalwill' and 'indivisible' to covary.
# The modification index (MI) from the baseline model indicates a high value for the residual covariance of 'generalwill' and 'indivisible', suggesting that adding this parameter would significantly improve the model fit.
# Theoretically, it is reasonable to assume that experts might have interpreted these two items in a similar way, which would affect the residual/error terms of these variables. Therefore, specifying this residual covariance helps account for shared variance due to similar interpretations.


# Define the model with residual covariances

cfa_cov_1_full <- '

  populism_cfa_cov_1 =~ manichean + generalwill + peoplecentrism + antielitism + indivisible
  
  generalwill ~~ indivisible
  
'


# Fit the model to the data

fit_cov_1_full <- cfa(model = cfa_cov_1_full, data = df_integrated)


# Summarize the results

# summary(fit_cov_1_full, fit.measures = TRUE, standardized = TRUE)

# modificationIndices(fit_cov_1_full, sort = TRUE)


# The model ends normally and there are 4 degrees of freedom.
# The model `cfa_cov_1_full` produces a better fit but we can still improve on the fit. 


# 3. This model allows the residuals of 'generalwill' and 'indivisible' to covary and in addition it covaries 'manichean' and 'peoplecentrism'. 
# The modification index (MI) from the model with 1 residual covariance indicates a high value for the residual covariance between 'manichean' and 'peoplecentrism', suggesting that adding this parameter would significantly improve the model fit. 
# There is also theoretical reasons for why these two items might share residuals/ error terms.
# As we will see, there appears to be, however, an issue of negative correlation between these items. 

cfa_cov_2_full_manichean <- '

populism_cfa_cov_2 =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

generalwill ~~ indivisible
manichean ~~ peoplecentrism

'


# Fit the model to the data

fit_cov_2_full_manichean <- cfa(model = cfa_cov_2_full_manichean, data = df_integrated)


# Summarize the results

# summary(fit_cov_2_full_manichean, fit.measures = TRUE, standardized = TRUE)

# modificationIndices(fit_cov_2_full_manichean, sort = TRUE)

# There are issues with this model. We see that there are negative correlations between `manichean ~~ peoplecentrism`
# If we run the modification index we see that there is an issue of correlation between `manichean peoplecentrism`


# 4. The previous model covaried `manichean ~~ peoplecentrism`. This produced a negative residual covariance. Subsequent analysis with this route showed problems.
# We, therefore run a new model which allows the residuals of 'generalwill' and 'indivisible' to covary and in addition it covaries 'peoplecentrism' and 'antielitism'. 
# Since the previous model where we include the covariation of `manichean ~~ peoplecentrism` produced issues we have opted for this second route. 
# The modification index (MI) from the model with 1 convariance indicates a high value for the residual covariance between 'peoplecentrism' and 'antielitism', suggesting that adding this parameter would significantly improve the model fit. 
# There is also theoretical reasons for why these two items might share residuals/ error terms. 


cfa_cov_2_full <- '

populism_cfa_cov_2 =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

generalwill ~~ indivisible
peoplecentrism ~~ antielitism

'


# Fit the model to the data

fit_cov_2_full <- cfa(model = cfa_cov_2_full, data = df_integrated)


# Summarize the results

# summary(fit_cov_2_full, fit.measures = TRUE, standardized = TRUE)

# modificationIndices(fit_cov_2_full, sort = TRUE)

# The model ended normally with 3 degrees of freedom. 
# The model fit is good but we can still improve the model. 

# 5. We run the modification indices again. We see that two sets of items produce high values: `manichean ~~ antielitism` and `manichean ~~ peoplecentrism`. We choose to covary `manichean ~~ antielitism` do to the earlier issues with `manichean ~~ peoplecentrism`.
# This model allows the residuals of 'generalwill' and 'indivisible' to covary, peoplecentrism and antielitism (i.e. as in the previous model) and in addition it now covaries 'manichean' and 'antielitism'. 
# There is also theoretical reasons for why these two items might share residuals/ error terms. 


cfa_cov_3_full <- '

populism_cfa_cov_3 =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

generalwill ~~ indivisible
peoplecentrism ~~ antielitism
manichean ~~  antielitism

'

# Fit the model to the data

fit_cov_3_full <- cfa(model = cfa_cov_3_full, data = df_integrated)


# Summarize the results

# summary(fit_cov_3_full, fit.measures = TRUE, standardized = TRUE)

# modificationIndices(fit_cov_3_full, sort = TRUE)


# The model ends normally with 2 degrees of freedom. 
# The model produces a very good fit. 
# The modification indices show no more high values. 


# --------- Multigroup models -----------------

# We run each model as a multigroup model. 

fit_mgm_basic <- cfa(cfa_basic_full, data = df_integrated, group = "wave")

# summary(fit_mgm_basic, fit.measures = TRUE, standardized = TRUE)

fit_mgm_cov_1 <- cfa(cfa_cov_1_full, data = df_integrated, group = "wave")

# summary(fit_mgm_cov_1, fit.measures = TRUE, standardized = TRUE)

fit_mgm_cov_2 <- cfa(cfa_cov_2_full, data = df_integrated, group = "wave")

# summary(fit_mgm_cov_2, fit.measures = TRUE, standardized = TRUE)

fit_mgm_cov_3 <- cfa(cfa_cov_3_full, data = df_integrated, group = "wave")

# summary(fit_mgm_cov_3, fit.measures = TRUE, standardized = TRUE)



# We create tables for the CFA models. 
# We only show the models with 3 residual covariances in the main text. The rest we show in the Appendix. 

# We create tables for the CFA. 

vlabs_five_basic <- c("populism_cfa_basic" = "Populism" , "manichean" = "Manichean" , "generalwill" = "General Will" , "peoplecentrism" = " People Centrism" , "antielitism" = "Anti-Elitism" , "indivisible" = "Indivisible")

vlabs_five_cov_1 <- c("populism_cfa_cov_1" = "Populism" , "manichean" = "Manichean" , "generalwill" = "General Will" , "peoplecentrism" = " People Centrism" , "antielitism" = "Anti-Elitism" , "indivisible" = "Indivisible")

vlabs_five_cov_2 <- c("populism_cfa_cov_2" = "Populism" , "manichean" = "Manichean" , "generalwill" = "General Will" , "peoplecentrism" = " People Centrism" , "antielitism" = "Anti-Elitism" , "indivisible" = "Indivisible")

vlabs_five_cov_3 <- c("populism_cfa_cov_3" = "Populism" , "manichean" = "Manichean" , "generalwill" = "General Will" , "peoplecentrism" = " People Centrism" , "antielitism" = "Anti-Elitism" , "indivisible" = "Indivisible")


semTable(fit_basic_full, type = "latex",
         columns = c("est", "se", "p"),
         columnLabels = c(est = "Estimate",
                          se = "Std. Err.",
                          p = "p-value"),
         varLabels = vlabs_five_basic,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_basic_full.tex") 

 semTable(fit_cov_1_full, type = "latex",
         columns = c("est", "se", "p"),
         columnLabels = c(est = "Estimate",
                          se = "Std. Err.",
                          p = "p-value"),
         varLabels = vlabs_five_cov_1,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_cov_1_full.tex") 

  semTable(fit_cov_2_full, type = "latex",
         columns = c("est", "se", "p"),
         columnLabels = c(est = "Estimate",
                          se = "Std. Err.",
                          p = "p-value"),
         varLabels = vlabs_five_cov_2,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_cov_2_full.tex") 
  
  
  semTable(fit_cov_3_full, type = "latex",
         columns = c("est", "se", "p"),
         columnLabels = c(est = "Estimate",
                          se = "Std. Err.",
                          p = "p-value"),
         paramSets = c("loadings", "residualcovariances"),
         varLabels = vlabs_five_cov_3,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_cov_3_full_paper.tex") 
  
  
  semTable(fit_cov_3_full, type = "latex",
         columns = c("est", "se", "p"),
         columnLabels = c(est = "Estimate",
                          se = "Std. Err.",
                          p = "p-value"),
         varLabels = vlabs_five_cov_3,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_cov_3_full_appendix.tex") 
  
  

# We create tables for the multigroup. 
# We only show the models with 3 covarinces in the main text. The rest we show in the Appendix. 
# We use `estsestars` for the multigroup to save space in the table.
  

semTable(fit_mgm_basic, type = "latex",
         columns = c("estsestars"),
         paramSets = c("loadings", "intercepts", "residualvariances", "latentvariances"),
         varLabels = vlabs_five_basic,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_mgm_basic.tex") 

semTable(fit_mgm_cov_1, type = "latex",
         columns = c("estsestars"),
         paramSets = c("loadings", "intercepts", "residualcovariances", "residualvariances", "latentvariances"),
         varLabels = vlabs_five_cov_1,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_mgm_cov_1.tex") 

semTable(fit_mgm_cov_2, type = "latex",
         columns = c("estsestars"),
         paramSets = c("loadings", "intercepts", "residualcovariances", "residualvariances", "latentvariances"),
         varLabels = vlabs_five_cov_2,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_mgm_cov_2.tex") 


semTable(fit_mgm_cov_3, type = "latex",
         columns = c("estsestars"),
         paramSets = c("loadings", "residualcovariances", "latentvariances"),
         varLabels = vlabs_five_cov_3,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_mgm_cov_3_paper.tex")


semTable(fit_mgm_cov_3, type = "latex",
         columns = c("estsestars"),
         paramSets = c("loadings", "intercepts", "residualcovariances", "residualvariances", "latentvariances"),
         varLabels = vlabs_five_cov_3,
         fits = c("cfi", "chisq", "rmsea", "rmsea.pvalue", "srmr"), 
         file = "cfa_tables/fit_mgm_cov_3_appendix.tex") 




## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: CFA with Three Residual Covariances (Unstandardized Estimates)
#| label: tbl-cfa3

# The table for the CFA model with three residual covariances. 
         
cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_cov_3_full_paper.tex}
\\hspace*{.25cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Multigroup CFA with Three Residual Covariances (Unstandardized Estimates)
#| label: tbl-cfa-mg-3


# The table for the multigroup model with three residual covariances.

cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_mgm_cov_3_paper.tex}
\\hspace*{-1.5cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false

# We run the invariance tests now but we only use them in the Appendix. 
# We do this to avoid variables colliding with CFA's when we create the populism CFA variable from the regression scores. 

# We run the models for the Baseline Model.
configural_basic <- cfa(cfa_basic_full, data = df_integrated, group = "wave")
metric_basic <- cfa(cfa_basic_full, data = df_integrated, group = "wave", group.equal = "loadings")
scalar_basic <- cfa(cfa_basic_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts"))
strict_basic <- cfa(cfa_basic_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts", "residuals"))

lavTestLRT_results_basic <- lavTestLRT(configural_basic, metric_basic, scalar_basic, strict_basic)
lavTestLRT_results_basic_df <- as.data.frame(lavTestLRT_results_basic)
lavTestLRT_results_basic_df <- lavTestLRT_results_basic_df %>% 
  tibble::rownames_to_column("model_raw") %>%
  mutate(Model = case_when(
    model_raw == "configural_basic" ~ "Configural",
    model_raw == "metric_basic"     ~ "Metric",
    model_raw == "scalar_basic"     ~ "Scalar",
    model_raw == "strict_basic"     ~ "Strict",
    .default                        = model_raw
  )) %>%
  select(Model, everything(), -model_raw)

# We run the models for the one residual covariance model.
configural_cov_1 <- cfa(cfa_cov_1_full, data = df_integrated, group = "wave")
metric_cov_1 <- cfa(cfa_cov_1_full, data = df_integrated, group = "wave", group.equal = "loadings")
scalar_cov_1 <- cfa(cfa_cov_1_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts"))
strict_cov_1 <- cfa(cfa_cov_1_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts", "residuals"))

lavTestLRT_results_cov_1 <- lavTestLRT(configural_cov_1, metric_cov_1, scalar_cov_1, strict_cov_1)
lavTestLRT_results_cov_1_df <- as.data.frame(lavTestLRT_results_cov_1)
lavTestLRT_results_cov_1_df <- lavTestLRT_results_cov_1_df %>% 
  tibble::rownames_to_column("model_raw") %>%
  mutate(Model = case_when(
    model_raw == "configural_cov_1" ~ "Configural",
    model_raw == "metric_cov_1"     ~ "Metric",
    model_raw == "scalar_cov_1"     ~ "Scalar",
    model_raw == "strict_cov_1"     ~ "Strict",
    .default                        = model_raw
  )) %>%
  select(Model, everything(), -model_raw)


# We run the models for the two residual covariance model.
configural_cov_2 <- cfa(cfa_cov_2_full, data = df_integrated, group = "wave")
metric_cov_2 <- cfa(cfa_cov_2_full, data = df_integrated, group = "wave", group.equal = "loadings")
scalar_cov_2 <- cfa(cfa_cov_2_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts"))
strict_cov_2 <- cfa(cfa_cov_2_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts", "residuals"))

lavTestLRT_results_cov_2 <- lavTestLRT(configural_cov_2, metric_cov_2, scalar_cov_2, strict_cov_2)
lavTestLRT_results_cov_2_df <- as.data.frame(lavTestLRT_results_cov_2)
lavTestLRT_results_cov_2_df <- lavTestLRT_results_cov_2_df %>% 
  tibble::rownames_to_column("model_raw") %>%
  mutate(Model = case_when(
    model_raw == "configural_cov_2" ~ "Configural",
    model_raw == "metric_cov_2"     ~ "Metric",
    model_raw == "scalar_cov_2"     ~ "Scalar",
    model_raw == "strict_cov_2"     ~ "Strict",
    .default                        = model_raw
  )) %>%
  select(Model, everything(), -model_raw)


# We run the models for the three residual covariance model.
configural_cov_3 <- cfa(cfa_cov_3_full, data = df_integrated, group = "wave")
metric_cov_3 <- cfa(cfa_cov_3_full, data = df_integrated, group = "wave", group.equal = "loadings")
scalar_cov_3 <- cfa(cfa_cov_3_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts"))
strict_cov_3 <- cfa(cfa_cov_3_full, data = df_integrated, group = "wave", group.equal = c("loadings", "intercepts", "residuals"))

lavTestLRT_results_cov_3 <- lavTestLRT(configural_cov_3, metric_cov_3, scalar_cov_3, strict_cov_3)
lavTestLRT_results_cov_3_df <- as.data.frame(lavTestLRT_results_cov_3)
lavTestLRT_results_cov_3_df <- lavTestLRT_results_cov_3_df %>% 
  tibble::rownames_to_column("model_raw") %>%
  mutate(Model = case_when(
    model_raw == "configural_cov_3" ~ "Configural",
    model_raw == "metric_cov_3"     ~ "Metric",
    model_raw == "scalar_cov_3"     ~ "Scalar",
    model_raw == "strict_cov_3"     ~ "Strict",
    .default                        = model_raw
  )) %>%
  select(Model, everything(), -model_raw)


# This is for the table with all of the models. 

# We add a column to indicate the section.
lavTestLRT_results_basic_df <- lavTestLRT_results_basic_df %>% mutate(Section = "Model: Baseline")
lavTestLRT_results_cov_1_df <- lavTestLRT_results_cov_1_df %>% mutate(Section = "Model: Res. Cov. 1")
lavTestLRT_results_cov_2_df <- lavTestLRT_results_cov_2_df %>% mutate(Section = "Model: Res. Cov. 2")
lavTestLRT_results_cov_3_df <- lavTestLRT_results_cov_3_df %>% mutate(Section = "Model: Res. Cov. 3") 


# We create a table of the invariance test results. 
# We do this here to keep everything together.
# We use this in the Appendix.


# We combine all dataframes.
combined_lavTestLRT_results <- bind_rows(
  lavTestLRT_results_basic_df,
  lavTestLRT_results_cov_1_df,
  lavTestLRT_results_cov_2_df,
  lavTestLRT_results_cov_3_df
)

# We ensure correct ordering of columns.
combined_lavTestLRT_results <- combined_lavTestLRT_results %>%
  rename(`Model Group` = Section) %>% 
  select(`Model Group`, Model, Df, AIC, BIC, Chisq, `Chisq diff`, `Df diff`, `Pr(>Chisq)`)




## ----------------------------------------------------------------------------------------
#| echo: false
#| eval: true
#| include: false

# We run IRT models for the whole dataset. 

df_irt_five_full <- df_integrated %>%
  select(manichean,                
         generalwill,       
         peoplecentrism,    
         antielitism,
         indivisible)

df_irt_five_full <- na.omit(df_irt_five_full)

# We round the values to the nearest integer.  

df_irt_five_full <- df_irt_five_full %>%
  mutate(across(everything(), round)) 

# We fit the model using the GRM for polytomous (ordered) items.

model_graded <- 'Populism = 1-5'

fit_5_full <- mirt(data = df_irt_five_full, model = model_graded, itemtype = "graded")




## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 12
#| fig-height: 10
#| label: fig-irt-model-combined

# We create a function for the Item Information plots. 
# Since we render the table a few times with different versions of the dataset, the function eliminates possible overlaps. 

# Function to extract information data.

extract_info_data <- function(fit_model, theta_values, item_names) {
  info_results <- data.frame()
  for (i in 1:extract.mirt(fit_model, 'nitems')) {
    item <- extract.item(fit_model, i)
    info_values <- iteminfo(item, Theta = theta_values)
    info_df <- data.frame(Theta = theta_values, Information = info_values, Item = item_names[i])
    info_results <- bind_rows(info_results, info_df)
  }
  return(info_results)
}

# We create a function for the Item Probability Function. 
# Since we render the table a few times with different versions of the dataset, the function eliminates possible overlaps. 

# Function to extract probability data.

extract_prob_data <- function(fit_model, theta_values, item_names) {
  results <- data.frame()
  
  for (i in 1:extract.mirt(fit_model, 'nitems')) {
    item <- extract.item(fit_model, i)
    probabilities <- probtrace(item, Theta = theta_values)
    item_df <- as.data.frame(probabilities)
    item_df$Theta <- theta_values
    item_df$Item <- item_names[i]
    
# We reshape the data to long format.
    
    item_df <- item_df %>%
      pivot_longer(cols = starts_with("P"), 
                   names_to = "Category", 
                   values_to = "Probability")
    
# We convert the Category to a factor with specified levels.
    
    item_df$Category <- factor(item_df$Category, levels = paste0("P.", 1:11))
    
    results <- bind_rows(results, item_df)
  }
  
  return(results)
}


# We render full dataset for the Item Information plots.
# We define the sequence of theta values and item names for the full dataset.
# These will be reused for both the Item Information and Item Probability calculations.

theta_values_full <- seq(-3, 3, by = 0.1)
item_names_full <- colnames(extract.mirt(fit_5_full, 'data'))

info_results_full <- extract_info_data(fit_5_full, theta_values_full, item_names_full)

# We plot the information data.

plot_item_inf_full <- info_results_full %>%
  mutate(Item = forcats::fct_recode(Item, !!!populist_items), 
         Item = forcats::fct_relevel(Item, !!!populism_items_order)) %>%
  ggplot(aes(x = Theta, y = Information, color = Item)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Item) +
  theme_bw() +
  labs(title = "Item Information (Full Dataset)",
       x = expression(theta),
       y = "Information") +  
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.box = "horizontal") +  
  guides(color = guide_legend(title = "Items", nrow = 1), 
         linetype = guide_legend(title = "Items", nrow = 1))


# We render full dataset for Item Probability Function. 
# We reuse the theta_values_full and item_names_full variables defined above.

results_full <- extract_prob_data(fit_5_full, theta_values_full, item_names_full)


# We plot the probability data.

# We create a vector so that the colours are the same between plots for the levels. 

category_levels <- c("P.1", "P.2", "P.3",  "P.4",  "P.5",  "P.6",  "P.7",  "P.8", "P.9",  "P.10", "P.11")


# We named the vector for the colors.

category_colors <- setNames(custom_palette, category_levels)


lty_vector <- c(rep("solid", 7), rep("dashed", 4))

plot_ipf_full <- results_full %>%
  mutate(Item = forcats::fct_recode(Item, !!!populist_items), 
         Item = forcats::fct_relevel(Item, !!!populism_items_order)) %>%
  ggplot(aes(x = Theta, y = Probability, color = Category, linetype = Category)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Item, scales = "free_y") +
  theme_bw() +
  labs(title = "Item Probability Function (Full Dataset)",
       x = expression(theta),
       y = expression(P(theta)),
       color = "Items") +
  scale_color_manual(values = category_colors) +
  scale_linetype_manual(values = lty_vector) +  
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.box = "horizontal") +  
  guides(color = guide_legend(title = "Items", nrow = 1), 
         linetype = guide_legend(title = "Items", nrow = 1))

# We combine the plots.

plot_item_inf_full / plot_ipf_full +
  plot_annotation(tag_levels = '1')



## ----------------------------------------------------------------------------------------
#| echo: false

# Create the populism variable from the factor scores. 

# We first make the populism variable from the regression scores from the CFA. 
# We attach the regression scores from the model with three residual covariances from the full dataset.
# We use the function from above. 

df_integrated <- attach_factor_scores(df_integrated, fit_cov_3_full)

# We rescale the variable

df_integrated$populism_cfa_resc <- scales::rescale(df_integrated$populism_cfa_cov_3, to = c(0, 10))



## ----------------------------------------------------------------------------------------
#| echo: false
#| warning: false
#| fig-height: 6
#| fig-width: 6
#| fig-cap: "Degrees of Populism per Party Family"
#| label: fig-violin-plots


# We create a violin plot for populism per party family. 

# We create a vector to reorder the party families. 

family_reorder <- c("Radical Right",
                    "Radical Left",
                    "Christian Democratic",
                    "Social Democratic",
                    "Conservatives",
                    "Green",
                    "Liberal",              
                    "Regionalist",
                    "Confessional",
                    "Agrarian",             
                    "No family") 



# This is the violin plot.

local({
  set.seed(1234)  # We use a seed to ensure that the labels are the same every time we render the plot. This seed only affects this block.

df_integrated %>%
  mutate(family_label = forcats::fct_rev(forcats::fct_relevel(family_label, family_reorder))) %>% 
  dplyr::filter(family_label != "Confessional") %>%
  group_by(family_label) %>%
  mutate(
    lower_bound = quantile(populism_cfa_resc, 0.10, na.rm = TRUE),  
    upper_bound = quantile(populism_cfa_resc, 0.90, na.rm = TRUE)   
  ) %>%
  ungroup() %>%
  ggplot(aes(x = family_label, y = populism_cfa_resc, colour = regions)) + #fill = family_label, 
  geom_violin(trim = TRUE, color = "black") +
  geom_jitter(shape = 16, position = position_jitter(0.2)) +
  geom_text_repel(aes(label = ifelse(populism_cfa_resc < lower_bound | populism_cfa_resc > upper_bound, 
                                     party_short, 
                                     NA),
                      color = regions),  # Color text by region
                  size = 2, 
                  max.overlaps = 30,
                  show.legend = FALSE) +  
  
  stat_summary(fun = median, geom = "point", shape = 23, size = 3, fill = "red", color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 10.5)) + 
  theme_bw() +
  labs(x = "",
       y = "",
       colour = "Regions") +
  guides(
    fill = "none"
  ) +
  theme(
    legend.background=element_blank(),
    plot.tag = element_text(size = 16),
    plot.title = element_text(hjust = 0.5),
    axis.title.y = element_text(size = 16),
    axis.title.x = element_text(size = 16),
    legend.position = "inside",
    legend.position.inside = c(0.1, 0.88))

})



## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 6
#| fig-height: 6
#| fig-cap: "Comparing Populism per Waves per Party Family"
#| label: fig-party-families-boxplots


# We make a dataset with parties that appear in both waves.
# We then create populism variable from the CFA from the dataset where parties appear in both waves.

# We filter the parties that are in Wave 1 and Wave 2.

df_parties_both_waves_filter <- df_integrated %>%
  group_by(poppa_id) %>%
  dplyr::summarize(wave_count = n_distinct(wave, na.rm = TRUE)) %>%
  dplyr::filter(wave_count == 2) %>%
  select(poppa_id)

# We create a dataset with only the parties that appear in both waves.

df_parties_both_waves <- semi_join(df_integrated, df_parties_both_waves_filter, by = join_by(poppa_id))


# We remove CFA variables form the original dataset to avoid confusion. 

df_parties_both_waves <- df_parties_both_waves %>%
  select(-c(populism_cfa_cov_3, populism_cfa_resc))

# We run a CFA with for three residual covariances with the parties that appear in both datasets. 

cfa_cov_3_both <- '

populism_cfa_cov_3_both =~ manichean + generalwill + peoplecentrism + antielitism + indivisible

generalwill ~~ indivisible
peoplecentrism ~~ antielitism
manichean ~~  antielitism

'

# We fit the model to the data.

fit_cov_3_both <- cfa(model = cfa_cov_3_both, data = df_parties_both_waves)


# We use the function from above to attach the factor scores.

df_parties_both_waves <- attach_factor_scores(df_parties_both_waves, fit_cov_3_both)


# We rescale the factor scores.

df_parties_both_waves$populism_cfa_both_resc <- scales::rescale(df_parties_both_waves$populism_cfa_cov_3_both, to = c(0, 10))


# We mean center nativism, authoritarian and lrecon for the interaction effects below. 

df_parties_both_waves <- df_parties_both_waves %>% 
  mutate(nativism_centered = nativism - mean(nativism, na.rm =TRUE),
         authoritarian_centered = authoritarian - mean(authoritarian, na.rm = TRUE),
         lrecon_centered = lrecon - mean(lrecon, na.rm = TRUE) 
  )


# We create the populism difference between the two waves.
# We use the base R diff function to do this. 
# This creates an new variable with the difference between the second and the first wave.
# The diff function is calculated as Wave 2 minus Wave 1.
# A positive value indicates that populism increased in Wave 2 compared to Wave 1.
# The arrange function is important to ensure that poppid and wave are sorted correctly. 
# We use the new reframe function from dplyr: unlike the summarize function, the reframe function does not eliminate the other rows. 
# We include party names, but only for parties in the top and bottom 10th percentile. We do this so as not to clutter the plot. 


local({
  set.seed(1234)  # We use a seed to ensure that the labels are the same every time we render the plot. This seed only affects this block.
  
  df_parties_both_waves %>%
  dplyr::filter(family_label != "Confessional") %>%
  dplyr::filter(!is.na(populism_cfa_both_resc)) %>% 
  group_by(family_label, poppa_id, party_short, regions) %>%
  arrange(poppa_id, wave) %>%
  reframe(diff_populism = diff(populism_cfa_both_resc)) %>%
  mutate(family_label = forcats::fct_relevel(as.factor(family_label), family_reorder),
         family_label = forcats::fct_rev(family_label)) %>%
  group_by(family_label) %>%
  mutate(Q1 = quantile(diff_populism, 0.10, na.rm = TRUE),
         Q3 = quantile(diff_populism, 0.90, na.rm = TRUE),
         is_outside_box = diff_populism < Q1 | diff_populism > Q3) %>%
  ungroup() %>%
  ggplot(aes(y = family_label, x = diff_populism)) +  
  geom_boxplot() +  
  geom_point(size = 2, alpha = 0.8, aes(group = family_label, colour = regions), stroke = 0.5) +  
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +  
  geom_text_repel(data = . %>% dplyr::filter(is_outside_box) %>%
                    distinct(family_label, diff_populism, .keep_all = TRUE),
                  aes(label = paste0(party_short)),
                  size = 3, box.padding = 0.6, point.padding = 0.3, max.overlaps = Inf) +  
  theme_bw() +  
  labs(y = "", x = "Positive values indicate higher levels of populism in Wave 2", colour = "Regions") +  
  guides(fill = "none") +
  theme(legend.position = "inside",
        legend.position.inside = c(0.1, 0.3),
        legend.background=element_blank())
})



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Populist Party Characteristics
#| tbl-cap-location: top
#| label: tbl-pop-char-ols


# We rename the variables and define which variables to show in the output for the regression table. 

cm_char <- c(
  'nativism' = 'Nativism',
  'authoritarian' = 'Authoritarianism',
  'lrecon' = 'Left-Right Economy',
  'waveWave 2 - 2023' = 'Wave 2',
  '(Intercept)' = '(Intercept)'
)


# We run the OLS models for positions.

models_pop_char <- list(
  "Model 1"  = lm(populism_cfa_resc ~ nativism + country + wave, data = df_integrated),
  "Model 2"  = lm(populism_cfa_resc ~ lrecon + country + wave, data = df_integrated),
  "Model 3"  = lm(populism_cfa_resc ~ authoritarian + country + wave, data = df_integrated),
  "Model 4"  = lm(populism_cfa_resc ~ nativism + lrecon + country + wave, data = df_integrated)

  )



modelsummary::modelsummary(models_pop_char, 
                           estimate = "{estimate}{stars}",                           
                           output = 'kableExtra',
                           coef_map = cm_char,
                           gof_map = c("nobs", "r.squared")) %>%
  kable_styling(font_size = 7) %>% 
  kableExtra::add_footnote(
    label = c("With country fixed effects", 
              "Standard errors are shown in parentheses",
              "+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001"),
    notation = "none"
  )




## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Interaction with Wave
#| label: tbl-interaction-wave

# We now run the regressions for the full dataset and for the dataset with only parties that appear in both waves. 

# We rename the variables and we define which variables to include in the regression table. 

cm_wave_centered <- c(
           'waveWave 2 - 2023' = 'Wave 2',
           'nativism_centered' = 'Nativism (Ctr)',
           'lrecon_centered'  = 'LR Economy (Ctr)',
           'authoritarian_centered' = 'Authoritarianism (Ctr)',
           'waveWave 2 - 2023:nativism_centered' = 'Wave 2 x Nativism (Ctr)',
           'waveWave 2 - 2023:lrecon_centered' =  'Wave 2 x LR Economy (Ctr)',
           'waveWave 2 - 2023:authoritarian_centered' = 'Wave 2 x Authoritarianism (Ctr)',
           '(Intercept)' = '(Intercept)'
         )
         

#  We run the regression models for the full dataset and for the parties that appear in both waves.

models_interaction_wave <- list(
           "Model 1 (FD)" = lm(populism_cfa_resc ~ wave + country, data = df_integrated),
           "Model 2 (FD)" = lm(populism_cfa_resc ~ lrecon_centered + wave*nativism_centered + country, data = df_integrated),
           "Model 3 (FD)" = lm(populism_cfa_resc ~ nativism_centered + wave*lrecon_centered + country, data = df_integrated),
           "Model 4 (FD)" = lm(populism_cfa_resc ~ lrecon_centered + wave*authoritarian_centered + country, data = df_integrated),
           "Model 5 (DSP)" = lm(populism_cfa_both_resc ~ wave + country, data = df_parties_both_waves),
           "Model 6 (DSP)" = lm(populism_cfa_both_resc ~ lrecon_centered + wave*nativism_centered + country, data = df_parties_both_waves),
           "Model 7 (DSP)" = lm(populism_cfa_both_resc ~ lrecon_centered + wave*authoritarian_centered + country, data = df_parties_both_waves)
         )

modelsummary::modelsummary(models_interaction_wave, 
                           estimate = "{estimate}{stars}",
                           output = 'kableExtra',
                           coef_map = cm_wave_centered,
                           gof_map = c("nobs", "r.squared")) %>%
  kable_styling(font_size = 8) %>%
  column_spec(2:7, width = "5em") %>%  
  kableExtra::add_footnote(
    label = c("With country fixed effects; FD: Full dataset; DSP: Dataset with shared parties", 
              "Standard errors are shown in parentheses",
              "+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001"),
    notation = "none"
  )



## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 6
#| fig-height: 10
#| fig-cap: "Marginal Effects for Interaction Effects"
#| label: fig-marginal-effects


# These are the marginal effects plots showing the effect of wave on populism, conditioned by (i.e., moderated by) nativism (centered) and authoritarianism (centered).

interaction_wave_nativism_me <- plot_slopes(models_interaction_wave$`Model 2`, variables = "wave", condition = "nativism_centered") +
labs(title = "Marginal Effects of Wave on Populism, Conditioned by Nativism (Full Dataset)",
     x = "Nativism (Centered)") + 
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") 


interaction_wave_authoritarian_me <- plot_slopes(models_interaction_wave$`Model 4`, variables = "wave", condition = "authoritarian_centered") +
  labs(title = "Marginal Effects of Wave on Populism, Conditioned by Authoritarianism (Full Dataset)",
       x = "Authoritarianism (Centered)") + 
   geom_hline(yintercept = 0, linetype = "dashed", color = "red") 


interaction_wave_nativism_both_me <- plot_slopes(models_interaction_wave$`Model 6`, variables = "wave", condition = "nativism_centered") +
  labs(title = "Marginal Effects of Wave on Populism, Conditioned by Nativism (Parties in Both Waves)",
       x = "Nativism (Centered)") + 
   geom_hline(yintercept = 0, linetype = "dashed", color = "red") 
 


(
  interaction_wave_nativism_me + 
  interaction_wave_authoritarian_me +
  interaction_wave_nativism_both_me +
  plot_layout(nrow = 3, guides = "collect") +
  plot_annotation(tag_levels = '1') 
  ) &  
  theme_bw() &
  theme(plot.title = element_text(hjust = 0.5, size = 9),
        axis.title = element_text(size = 9),
        axis.text.y = element_text(size = 9),
        panel.grid = element_blank()) 




## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 6
#| fig-height: 10
#| fig-cap: "Predictions for Interaction Effects"
#| label: fig-predictions


# These are the prediction plots showing the effect of wave on populism across levels of nativism (centered) and authoritarianism (centered).

interaction_wave_nativism_pred <- plot_predictions(models_interaction_wave$`Model 2`,
                 condition = list("wave", nativism_centered = "threenum")) +
  labs(
    title = "Predictions of Wave on Populism, by Nativism (Full Dataset)",
    x = "",
    y = "Populism",
    colour = "Moderator (Ctr)") 


interaction_wave_authoritarian_pred <- plot_predictions(models_interaction_wave$`Model 4`,
                 condition = list("wave", authoritarian_centered = "threenum")) +
  labs(
    title = "Predictions of Wave on Populism, by Authoritarianism (Full Dataset)",
    x = "",
    y = "Populism",
    colour = "Moderator (Ctr)") 

interaction_wave_nativism_both_pred <- plot_predictions(models_interaction_wave$`Model 6`,
                 condition = list("wave", nativism_centered = "threenum")) +
  labs(
    title = "Predictions of Wave on Populism, by Nativism (Parties in Both Waves)",
    x = "",
    y = "Populism",
    colour = "Moderator (Ctr)")


(
 interaction_wave_nativism_pred + 
 interaction_wave_authoritarian_pred + 
 interaction_wave_nativism_both_pred +
  plot_layout(nrow = 3, guides = "collect") +
  plot_annotation(tag_levels = '1') 
) &
  theme_bw() &
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = .5, size = 10),
    axis.title = element_text(size = 10),
    axis.text.x = element_text(size = 10),
    legend.text = element_text(size = 10) 
  ) 




## ----------------------------------------------------------------------------------------
#| echo: false
#| warning: false
#| tbl-cap: Summary Statistics (Full Dataset)
#| label: tbl-summary-statistics-full-appendix


# Descriptive table for the full dataset, including the CFA populism variable. 
# we use the vtable package.

# We create a new dataset for the descriptive table. 
# We do this in case we need to do some extra data manipulations for the table. 

df_integrated_table_appendix <- df_integrated


# In the published paper we did not include the party families in the descriptive table. We include them here. 
# We do not include the Confessional parties since there were too few. There were only six. As such we have 555 parties in the party families. 


df_integrated_table_appendix <- df_integrated_table_appendix 

# We order the party families for better presentation 

df_integrated_table_appendix <- df_integrated_table_appendix %>%
  mutate(family_label = forcats::fct_relevel(family_label, !!!family_reorder))


# We round the following variables for better presentation in the descriptive table. 
# In the paper for aesthetic reasons we round to less decimal points than in the replication file.
# In the paper we round to 0 decimal points for these variables. 

df_integrated_table_appendix$populism_cfa_cov_3 <- round(df_integrated_table_appendix$populism_cfa_cov_3, digits = 2)  
df_integrated_table_appendix$lrecon_centered <- round(df_integrated_table_appendix$lrecon_centered, digits = 2) 
df_integrated_table_appendix$nativism_centered <- round(df_integrated_table_appendix$nativism_centered, digits = 2) 
df_integrated_table_appendix$authoritarian_centered <- round(df_integrated_table_appendix$authoritarian_centered, digits = 2) 

# We rename the variables. 

labs_appendix <- c(populism_mean = 'Populism (Mean)',
                   populism_cfa_resc = 'Populism (CFA: Rescaled)',
                   populism_cfa_cov_3 = 'Populism (CFA: Standardized)',
                   antielitism = 'Anti-Elitism',
                   generalwill = 'General Will',
                   manichean   = 'Manichean',
                   indivisible = 'Indivisible',
                   peoplecentrism = 'People Centrism',
                   lroverall = 'Left-Right Overall',
                   lrecon = 'Left-Right Economy',
                   immigration = 'Immigration',
                   nativism = 'Nativism',
                   authoritarian = 'Authoritarian',
                   laworder = 'Law and Order',
                   lifestyle = 'Lifestyle',
                   eu = 'European Integration',
                   saliencecult = 'Salience Culture',
                   salienceecon = 'Salience Economy',
                   lrecon_centered = 'Left-Right Economy (Ctr)',
                   nativism_centered = 'Nativism (Ctr)',
                   authoritarian_centered = 'Authoritarian (Ctr)',
                   family_label = 'Party Families',               # This was not included in the published paper. 
                   wave = 'Wave')


# We create the descriptive table with vtable and kableExtra.

table_appendix <- df_integrated_table_appendix %>%
  select(populism_mean, 
         populism_cfa_resc,
         populism_cfa_cov_3,
         antielitism,
         generalwill, 
         manichean,  
         indivisible, 
         peoplecentrism, 
         lroverall, 
         lrecon,
         immigration,
         nativism, 
         authoritarian,
         laworder,
         lifestyle,
         eu,
         saliencecult,
         salienceecon,
         lrecon_centered,
         nativism_centered,
         authoritarian_centered,
         family_label,          # This was not included in the published paper.
         wave) %>%
  mutate(wave = forcats::fct_rev(wave) ) %>%
  vtable::sumtable(
    labels = labs_appendix,
    summ = c('notNA(x)',  
             'mean(x)',   
             'sd(x)',     
             'min(x)',    
             'max(x)'),   
    summ.names = c('N', 'Mean/ Percent', 'Std. Dev.', 'Min', 'Max'), 
    out = 'return'       
  )

table_appendix %>%
  kable(
    format = "latex",  
    booktabs = TRUE,
    linesep = "",
    row.names = FALSE
  ) %>%
  kable_classic %>% 
  kable_styling(
    latex_options = "scale_down",   
    full_width = TRUE,             
    position = "center",           
    font_size = 8
  ) %>%
  column_spec(1, width = "16em") %>%   
  column_spec(2:5, width = "5em") %>% 
  footnote(general = "We do not include the Confessional parties in the analyses in the paper due to the low number of parties.",
         general_title = "",
         footnote_as_chunk = TRUE)




## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: CFA Baseline Model (Unstandardized Estimates)
#| label: tbl-cfa-full_unstd

# CFA table for the baseline model
         
cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_basic_full.tex}
\\hspace*{2cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Standardized Factor Loadings for Populism CFA (Baseline Model)
#| label: tbl-cfa-full_baseline_std

# CFA table for the baseline model 
         
standardizedSolution(fit_basic_full) %>%
  dplyr::filter(op == "=~") %>%
  select(rhs, est.std, se, pvalue) %>%
  rename(
    Item = rhs,
    `Standardized Estimate` = est.std,
    `Std. Error` = se,
    `p-value` = pvalue
  ) %>%
  mutate(Item = forcats::fct_recode(factor(Item),
                               Manichean = "manichean",
                               "General Will" = "generalwill",
                               "People Centrism" = "peoplecentrism",
                               "Anti-Elitism" = "antielitism",
                               Indivisible    = "indivisible"
  ) ) %>%
  kable(digits = 3,
        align = "lrrr",
        booktabs = TRUE,
        format = "latex") %>%
  kable_styling(font_size = 8) 



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: CFA with One Residual Covariance (Unstandardized Estimates)
#| label: tbl-cfa-cov-1_unstd

# CFA table for 1 covariance
         
cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_cov_1_full.tex}
\\hspace*{1cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Standardized Factor Loadings for Populism CFA (One Residual Covariance)
#| label: tbl-cfa-full_cov_1_std
#| 

# CFA table for the model with 1 residual covariance
         
standardizedSolution(fit_cov_1_full) %>%
  dplyr::filter(op == "=~") %>%
  select(rhs, est.std, se, pvalue) %>%
  rename(
    Item = rhs,
    `Standardized Estimate` = est.std,
    `Std. Error` = se,
    `p-value` = pvalue
  ) %>%
  mutate(Item = forcats::fct_recode(factor(Item),
                               Manichean = "manichean",
                               "General Will" = "generalwill",
                               "People Centrism" = "peoplecentrism",
                               "Anti-Elitism" = "antielitism",
                               Indivisible    = "indivisible"
  ) ) %>%
  kable(digits = 3,
        align = "lrrr",
        booktabs = TRUE,
        format = "latex") %>%
  kable_styling(font_size = 8)



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: CFA with Two Residual Covariances (Unstandardized Estimates)
#| label: tbl-cfa-cov-2_unstd

# CFA table for two residual covariances
         
cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_cov_2_full.tex}
\\hspace*{0.25cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Standardized Factor Loadings for Populism CFA (Two Residual Covariances)
#| label: tbl-cfa-full_cov_2_std

# CFA table for the model with 2 residual covariances 
         
standardizedSolution(fit_cov_2_full) %>%
  dplyr::filter(op == "=~") %>%
  select(rhs, est.std, se, pvalue) %>%
  rename(
    Item = rhs,
    `Standardized Estimate` = est.std,
    `Std. Error` = se,
    `p-value` = pvalue
  ) %>%
  mutate(Item = forcats::fct_recode(factor(Item),
                               Manichean = "manichean",
                               "General Will" = "generalwill",
                               "People Centrism" = "peoplecentrism",
                               "Anti-Elitism" = "antielitism",
                               Indivisible    = "indivisible"
  ) ) %>%
  kable(digits = 3,
        align = "lrrr",
        booktabs = TRUE,
        format = "latex") %>%
  kable_styling(font_size = 8)



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: CFA with Three Residual Covariances (Unstandardized Estimates)
#| label: tbl-cfa-cov-3_unstd


# CFA table for three residual covariances
         
cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_cov_3_full_appendix.tex}
\\hspace*{0.25cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Standardized Factor Loadings for Populism CFA (Three Residual Covariances)
#| label: tbl-cfa-full_cov_3_std

# CFA table for the model with 3 residual covariances 
         
standardizedSolution(fit_cov_3_full) %>%
  dplyr::filter(op == "=~") %>%
  select(rhs, est.std, se, pvalue) %>%
  rename(
    Item = rhs,
    `Standardized Estimate` = est.std,
    `Std. Error` = se,
    `p-value` = pvalue
  ) %>%
  mutate(Item = forcats::fct_recode(factor(Item),
                               Manichean = "manichean",
                               "General Will" = "generalwill",
                               "People Centrism" = "peoplecentrism",
                               "Anti-Elitism" = "antielitism",
                               Indivisible    = "indivisible"
  ) ) %>%
  kable(digits = 3,
        align = "lrrr",
        booktabs = TRUE,
        format = "latex") %>%
  kable_styling(font_size = 8)



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Modification Indices (Baseline Model)
#| label: tbl-modification-indices-basic

# We create tables for the modification indices. 
# We use the models from above. 

# For the baseline model


modificationindices(fit_basic_full, sort = TRUE) %>%
  select(lhs, op, rhs, mi, epc) %>%
  kable(row.names = FALSE) %>%
  kable_classic() 



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Modification Indices (One Residual Covariance)
#| label: tbl-modification-indices-cov-1


# We create tables for the modification indices. 
# We use the models from above. 

# For the model with one residual covariance


modificationindices(fit_cov_1_full, sort = TRUE) %>%
  select(lhs, op, rhs, mi, epc) %>%
  kable(row.names = FALSE) %>%
  kable_classic() 



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Modification Indices with Manichean and People Centrism
#| label: tbl-modification-indices-cov-2-manichean


# We create tables for the modification indices. 
# We use the models from above. 

# For the model with two residual covariances with manichean and people centrism

modificationindices(fit_cov_2_full_manichean, sort = TRUE) %>%
  select(lhs, op, rhs, mi, epc) %>%
  kable(row.names = FALSE) %>%
  kable_classic()



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Modification Indices (Two Residual Covariances)
#| label: tbl-modification-indices-cov-2

# We create tables for the modification indices. 
# We use the models from above. 

# For the model with two residual convariances (second version)


modificationindices(fit_cov_2_full, sort = TRUE) %>%
  select(lhs, op, rhs, mi, epc) %>%
  kable(row.names = FALSE) %>%
  kable_classic() 



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Modification Indices (Three Residual Covariances)
#| label: tbl-modification-indices-cov-3

# We create tables for the modification indices. 
# We use the models from above. 

# For the model with three residual covariances

modificationindices(fit_cov_3_full, sort = TRUE) %>%
  select(lhs, op, rhs, mi, epc) %>%
  kable(row.names = FALSE) %>%
  kable_classic()



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Covariances of Residuals (Three Residual Covariances)
#| label: tbl-residuals-cov-3-cov


residuals_cov_3 <- lavResiduals(fit_cov_3_full, type = "cor")

residuals_cov_3[["cov"]] %>%
  as.data.frame() %>%
  `rownames<-`(c("Manichean", 
                 "General Will", 
                 "People Centrism", 
                 "Anti-Elitism", 
                 "Indivisible")) %>%  # Directly replace the old row names with new names
  rename(Manichean = manichean,
         `General Will` = generalwill,
         `People Centrism` = peoplecentrism,
         `Anti-Elitism` = antielitism,
         Indivisible = indivisible) %>%
  mutate(across(everything(), ~ round(.x, 3))) %>%
  {
    mat <- as.matrix(.)
    mat[upper.tri(mat)] <- ""  # Replace upper triangle with blanks
    as.data.frame(mat)
  } %>%
  rownames_to_column(var = "Variable") %>%
  kable()  %>%
  kable_classic()




## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Standardized Covariances of Residuals (Three Residual Covariances)
#| label: tbl-residuals-cov-3-cov-z


residuals_cov_3[["cov.z"]] %>%
  as.data.frame() %>%
  `rownames<-`(c("Manichean", 
                 "General Will", 
                 "People Centrism", 
                 "Anti-Elitism", 
                 "Indivisible")) %>%  
  rename(Manichean = manichean,
         `General Will` = generalwill,
         `People Centrism` = peoplecentrism,
         `Anti-Elitism` = antielitism,
         Indivisible = indivisible) %>%
  mutate(across(everything(), ~ round(.x, 3))) %>%
  {
    mat <- as.matrix(.)
    mat[upper.tri(mat)] <- ""  # Replace upper triangle with blanks
    as.data.frame(mat)
  } %>%
  rownames_to_column(var = "Variable") %>%
  kable()  %>%
  kable_classic()




## ----------------------------------------------------------------------------------------
#| echo: false
#| results: asis
#| tbl-cap: CFA Baseline Model (Unstandardized Estimates)
#| label: tbl-cfa-multi-group-basic


# CFA multigroup baseline model

cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_mgm_basic.tex}
\\hspace*{.25cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')




## ----------------------------------------------------------------------------------------
#| echo: false
#| results: asis
#| tbl-cap: CFA with One Residual Covariance (Unstandardized Estimates)
#| label: tbl-cfa-multi-group-cov-1


# CFA multigroup model with one residual covariance

cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_mgm_cov_1.tex}
\\hspace*{-0.8cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')




## ----------------------------------------------------------------------------------------
#| echo: false
#| results: asis
#| tbl-cap: CFA with Two Residual Covariances (Unstandardized Estimates)
#| label: tbl-cfa-multi-group-cov-2


# CFA multigroup model with two residual covariances

cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_mgm_cov_2.tex}
\\hspace*{-1.5cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')



## ----------------------------------------------------------------------------------------
#| echo: false
#| results: asis
#| tbl-cap: CFA with Three Residual Covariances (Unstandardized Estimates)
#| label: tbl-cfa-multi-group-cov-3


# CFA multigroup model with three residual covariances

cat('
\\begin{table}
\\centering
\\footnotesize  
\\input{cfa_tables/fit_mgm_cov_3_appendix.tex}
\\hspace*{-1.5cm}{\\parbox{0.6\\linewidth}{\\textit{Note}: All estimates are unstandardized.}}
\\end{table}
')




## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| label: tbl-invariance-all-models
#| tbl-cap: Invariance Test (All Models)

# We create a table for the invariance models. 
# We ran the models above so as to avoid issues of colliding variables. 

# We convert the data to a LaTeX-friendly table.

combined_lavTestLRT_results_xtable <- xtable(combined_lavTestLRT_results)

# We print the LaTeX table.

print(
  combined_lavTestLRT_results_xtable, 
  type = "latex", 
  booktabs = TRUE, 
  include.rownames = FALSE,
  comment = FALSE,
  floating = FALSE
)




## ----------------------------------------------------------------------------------------
#| include: false

# We run the IRT for Wave 1.

df_irt_five_wave_1 <- df_integrated %>%
  dplyr::filter(wave == "Wave 1 - 2018") %>%
  select(manichean,                
         generalwill,       
         peoplecentrism,    
         antielitism,
         indivisible)


df_irt_five_wave_1 <- na.omit(df_irt_five_wave_1)

# Since our variable is not an integer we round the values off.

df_irt_five_wave_1 <- df_irt_five_wave_1 %>%
  mutate(across(everything(), round))  


# Fit the model using the GRM for polytomous (ordered) items

fit_5_wave_1 <- mirt(data = df_irt_five_wave_1, model = model_graded, itemtype = "graded")



## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 14
#| fig-height: 8
#| fig-cap: "Item Information Plots Wave 1"
#| label: item-information-wave-1

# We apply the function from above for the Item Information for Wave 1

theta_values_wave_1 <- seq(-3, 3, by = 0.1)
item_names_wave_1 <- colnames(extract.mirt(fit_5_wave_1, 'data'))
info_results_wave_1 <- extract_info_data(fit_5_wave_1, theta_values_wave_1, item_names_wave_1)
info_results_wave_1$Item <- factor(info_results_wave_1$Item, levels = item_names_wave_1)

# We plot information data.

info_results_wave_1 %>%
  mutate(Item = forcats::fct_recode(Item, !!!populist_items),  
         Item = forcats::fct_relevel(Item, !!!populism_items_order)) %>%
  ggplot(aes(x = Theta, y = Information, color = Item)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Item) +
  theme_bw() +
  labs(x = expression(theta),
       y = "Information") +
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "bottom")



## ----------------------------------------------------------------------------------------
#| echo: false
#| eval: true
#| include: false

# We run the IRT for Wave 2.

df_irt_five_wave_2 <- df_integrated %>%
  dplyr::filter(wave == "Wave 2 - 2023") %>%
  select(manichean,                
         generalwill,       
         peoplecentrism,    
         antielitism,
         indivisible)


df_irt_five_wave_2 <- na.omit(df_irt_five_wave_2)

# Since our variable is not an integer we round the the values off to the nearest integer. 

df_irt_five_wave_2 <- df_irt_five_wave_2 %>%
  mutate(across(everything(), round)) %>%  
  mutate(across(everything(), ~ . - 1)) 

# We fit the model using the GRM for polytomous (ordered) items.

fit_5_wave_2 <- mirt(data = df_irt_five_wave_2, model = model_graded, itemtype = "graded")




## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 14
#| fig-height: 8
#| label: item-information-wave-2
#| fig-cap: "Item Information Plots Wave 2"

# We apply the function from above for the Item Information for Wave 2. 

# We process full dataset for the Item Information plots.
# We use the fitted IRT model (fit_5_wave_2).

theta_values_wave_2 <- seq(-3, 3, by = 0.1)
item_names_wave_2 <- colnames(extract.mirt(fit_5_wave_2, 'data'))
info_results_wave_2 <- extract_info_data(fit_5_wave_2, theta_values_wave_2, item_names_wave_2)
info_results_wave_2$Item <- factor(info_results_wave_2$Item, levels = item_names_wave_2)

# We plot the information data.
info_results_wave_2 %>%
  mutate(Item = forcats::fct_recode(Item, !!!populist_items),  # Recode items for Wave 2 dataset
         Item = forcats::fct_relevel(Item, !!!populism_items_order)) %>%
  ggplot(aes(x = Theta, y = Information, color = Item)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Item) +
  theme_bw() +
  labs(x = expression(theta),
       y = "Information") +
  theme(plot.title = element_text(hjust = 0.5), 
       legend.position = "bottom")



## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 14
#| fig-height: 8
#| fig-cap: "Item Proability Function (Wave 1)"
#| label: ipf-wave-1


# We apply the function from above for the Item Probability Function from wave 1. 

# We process full dataset for Item Probability Function. 
# We use the fitted IRT model (fit_5_wave_1).

theta_values_wave_1 <- seq(-3, 3, by = 0.1)
item_names_wave_1 <- colnames(extract.mirt(fit_5_wave_1, 'data'))
results_wave_1 <- extract_prob_data(fit_5_wave_1, theta_values_wave_1, item_names_wave_1)

# Convert item and category to factors for better labeling in the plot
results_wave_1$Item <- factor(results_wave_1$Item, levels = item_names_wave_1)
results_wave_1$Category <- factor(results_wave_1$Category, levels = unique(results_wave_1$Category))

# We plot using ggplot2 with the custom palette and linetypes for Wave 1.
results_wave_1 %>%
  mutate(Item = forcats::fct_recode(Item, !!!populist_items),  
         Item = forcats::fct_relevel(Item, !!!populism_items_order)) %>%
  ggplot(aes(x = Theta, y = Probability, color = Category, linetype = Category)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Item, scales = "free_y") +
  theme_bw() +
  labs(x = expression(theta),
       y = expression(P(theta))) +
  scale_color_manual(values = category_colors) +
  scale_linetype_manual(values = lty_vector) +  
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.box = "horizontal") +  
  guides(color = guide_legend(title = "Items", nrow = 1), 
         linetype = guide_legend(title = "Items", nrow = 1))



## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-width: 14
#| fig-height: 8
#| fig-cap: "Item Proability Function (Wave 2)"
#| label: ipf-full-wave-2

# We apply the function from above for the Item Probability Function from wave 2. 

# We define a sequence of theta values for Wave 2.
theta_values_wave_2 <- seq(-3, 3, by = 0.1)

# We get the original item names from the Wave 2 model.
item_names_wave_2 <- colnames(extract.mirt(fit_5_wave_2, 'data'))

# We use the function to extract the probability data for Wave 2.
results_wave_2 <- extract_prob_data(fit_5_wave_2, theta_values_wave_2, item_names_wave_2)

# We convert item and category to factors for better labeling in the plot.
results_wave_2$Item <- factor(results_wave_2$Item, levels = item_names_wave_2)
results_wave_2$Category <- factor(results_wave_2$Category, levels = unique(results_wave_2$Category))

# We plot using ggplot2 with the custom palette and linetypes for Wave 2.
results_wave_2 %>%
  mutate(Item = forcats::fct_recode(Item, !!!populist_items),  
         Item = forcats::fct_relevel(Item, !!!populism_items_order)) %>%
  ggplot(aes(x = Theta, y = Probability, color = Category, linetype = Category)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Item, scales = "free_y") +
  theme_bw() +
  labs(x = expression(theta),
       y = expression(P(theta))) +
  scale_color_manual(values = category_colors) +
  scale_linetype_manual(values = lty_vector) +  
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.box = "horizontal") +  
  guides(color = guide_legend(title = "Items", nrow = 1), 
         linetype = guide_legend(title = "Items", nrow = 1))




## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Correlation Matrix for Populist Items
#| label: tbl-correlations-populism

# We run a correlation matrix for the populism items using the modelsummary package.

populism_correlation <- df_integrated %>%
  select(Manichean = manichean, 
         Indivisible = indivisible, 
         `General Will` = generalwill, 
         `People Centrism` = peoplecentrism, 
         `Anti-Elitism` = antielitism) 

if (requireNamespace("correlation", quietly = TRUE)) {
  co <- correlation::correlation(populism_correlation)
  datasummary_correlation(co)
  
  # add stars to easycorrelation objects
  datasummary_correlation(co, stars = TRUE,
                          output = 'kableExtra')
}



## ----------------------------------------------------------------------------------------
#| echo: false
#| output: asis
#| tbl-cap: Correlation Matrix for Attaching Ideologies
#| label: tbl-correlations-attaching-ideologies

# We run a correlation matrix for the attaching ideologies using the modelsummary package.


correlation_attaching_ideologies <- df_integrated %>%
  select(Populism = populism_cfa_resc,
         `Left-Right Economy` = lrecon,
         Nativism = nativism,
         `Lifestyle (rev)` = lifestyle_rev,
         `Law and Order` = laworder,
         Authoritarianism = authoritarian) 

if (requireNamespace("correlation", quietly = TRUE)) {
  co <- correlation::correlation(correlation_attaching_ideologies)
  datasummary_correlation(co)
  
  datasummary_correlation(co, stars = TRUE,
                          output = 'kableExtra') %>%
    kable_classic() %>% 
    kable_styling(font_size = 8)
}



## ----------------------------------------------------------------------------------------
#| echo: false
#| fig-height: 8
#| fig-width: 12
#| tbl-cap: Difference per Party Family for Populism (Median, Mean, Standard Deviation)
#| label: tbl-difference-family-table-populism

# We create tables for populism for the median, mean and standard deviation.
# We do this since the plots are can be difficult to read in terms of actual values. 

summary_table_populism <- df_parties_both_waves %>%
  dplyr::filter(family_label != "Confessional") %>%
  dplyr::filter(!is.na(populism_cfa_both_resc)) %>%  
  group_by(family_label, poppa_id, party_short) %>%
  arrange(poppa_id, wave) %>%
  reframe(diff_populism = diff(populism_cfa_both_resc)) %>%
  group_by(family_label) %>%
  dplyr::summarize(
    median_diff_populism = median(diff_populism, na.rm = TRUE),
    mean_diff_populism = mean(diff_populism, na.rm = TRUE),
    sd_diff_populism = sd(diff_populism, na.rm = TRUE)
  ) %>%
  ungroup() 


summary_table_populism %>%
  kable(
    format = "latex", 
    booktabs = TRUE, 
    linesep = "", 
    digits = 2, 
    col.names = c("Party Family", "Median Diff Populism", "Mean Diff Populism", "SD Diff Populism")
  ) %>%
  add_header_above(c(" " = 1, "Populism Difference Statistics" = 3)) %>%
  kable_classic() %>%
  kable_styling(latex_options = c("striped", "scale_down"), 
                full_width = FALSE, 
                position = "center", 
                font_size = 10) %>%
  column_spec(2:4, width = "5cm")



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Populist Party Characteristics (With Populism Mean)
#| label: tbl-pop-char-ols-mean

# We run the regression models for position with the populism mean variable. 

models_pop_char_mean <- list(
  "Model 1"  = lm(populism_mean ~ nativism + country + wave, data = df_integrated),
  "Model 2"  = lm(populism_mean ~ lrecon + country + wave, data = df_integrated),
  "Model 3"  = lm(populism_mean ~ authoritarian + country + wave, data = df_integrated),
  "Model 4"  = lm(populism_mean ~ nativism + lrecon + country + wave, data = df_integrated)
  
)


modelsummary::modelsummary(models_pop_char_mean, 
                           estimate = "{estimate}{stars}",
                           output = 'kableExtra',
                           coef_map = cm_char,
                           gof_map = c("nobs", "r.squared")) %>%
  kable_styling(font_size = 8) %>%
  kableExtra::add_footnote(
    label = c("With country fixed effects", 
              "Standard errors are shown in parentheses",
              "+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001"),
    notation = "none"
  )



## ----------------------------------------------------------------------------------------
#| echo: false
#| tbl-cap: Interaction with Wave (With Populism Mean)
#| label: tbl-interaction-wave-mean

# We run the regression models for the interactions with wave with the populism mean variable. 

models_interaction_wave_mean <- list(
  "Model 1 (FD)" = lm(populism_mean ~ wave + country, data = df_integrated),
  "Model 2 (FD)" = lm(populism_mean ~ lrecon_centered + wave*nativism_centered + country, data = df_integrated),
  "Model 3 (FD)" = lm(populism_mean ~ nativism_centered + wave*lrecon_centered + country, data = df_integrated),
  "Model 4 (FD)" = lm(populism_mean ~ lrecon_centered + wave*authoritarian_centered + country, data = df_integrated),
  "Model 5 (DSP)" = lm(populism_mean ~ wave + country, data = df_parties_both_waves),
  "Model 6 (DSP)" = lm(populism_mean ~ lrecon_centered + wave*nativism_centered + country, data = df_parties_both_waves),
  "Model 7 (DSP)" = lm(populism_mean ~ lrecon_centered + wave*authoritarian_centered + country, data = df_parties_both_waves)
)

modelsummary::modelsummary(models_interaction_wave_mean, 
                          estimate = "{estimate}{stars}",                           
                          output = 'kableExtra',
                          coef_map = cm_wave_centered,
                          gof_map = c("nobs", "r.squared")) %>%
  kable_styling(full_width = FALSE, font_size = 8) %>%
  kable_styling(font_size = 8) %>%
  column_spec(2:7, width = "5em") %>%  # Adjust the width of the first column
  kableExtra::add_footnote(
    label = c("With country fixed effects; FD: Full dataset; DSP: Dataset with shared parties", 
              "Standard errors are shown in parentheses",
              "+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001"),
    notation = "none"
  )



## ----------------------------------------------------------------------------------------
#| echo: false
#| message: false
#| tbl-cap: Number of Experts Per Country
#| label: tbl-experts-per-country

# Country Level

# We import the expert-level data. This dataset includes party IDs and the number of experts per party, per item.
# The data is structured at the expert level (rows) with multiple rows per party.
# The number of experts can vary per party and per item.
# We filter out those row that have all missing values.

expert_data_2018 <- haven::read_dta(here("data/all_expert.dta")) %>%
  select(poppa_id = party_id,
         ends_with("_nr")) 

expert_data_2018$poppa_id <- as.numeric(expert_data_2018$poppa_id)

# We read the excel file in with the up to date information, i.e. party names etc. 

poppa_excel_2018 <- readxl::read_excel(here("data/POPPA_List_of_parties_2018.xlsx"), sheet = 2) %>%
  janitor::clean_names() %>%
  select(country,
         poppa_id = party_id,
         party_short = abbreviation,
         party_name_english)


# We join the Excel file with the expert-level data to update party information (e.g., names).
# We set any values with fewer than 4 experts to NA to exclude them from analysis.

expert_data_2018_joined <- full_join(expert_data_2018, poppa_excel_2018, by = join_by(poppa_id)) %>%
  select(country,
         poppa_id,
         party_short,
         party_name_english,
         everything()) %>%
  mutate(across(ends_with("_nr"), ~ ifelse(. < 4, NA, .))) 


# We make country a factor in preparation for joining with the 2023 dataset. 

expert_data_2018_joined$country <- forcats::as_factor(expert_data_2018_joined$country)


# We take the first value for each party per item. 
# The values per party per item are the same.
# In essence we are collapsing the data.

expert_data_2018_joined_collapsed <- expert_data_2018_joined %>%
  group_by(country, poppa_id, party_short) %>%
  summarise(across(ends_with("_nr"), first))

# We calculate the mean, min, and max number of experts.
# Since we are using rowwise calculations, these values are calculated per party, across all items.
# Afterward, we summarize the mean, min, and max at the country level by averaging the party-level results.

summary_table_2018_country <- expert_data_2018_joined_collapsed %>%
  rowwise() %>%  
  dplyr::filter(!all(is.na(c_across(ends_with("_nr"))))) %>%  
  mutate(
    mean_experts = round(mean(c_across(ends_with("_nr")), na.rm = TRUE), 2),   
    min_experts = min(c_across(ends_with("_nr")), na.rm = TRUE),              
    max_experts = max(c_across(ends_with("_nr")), na.rm = TRUE)               
  ) %>%
  ungroup() %>%  # Remove rowwise() context to group by country
  group_by(country) %>%
  summarise(
    mean_experts = round(mean(mean_experts, na.rm = TRUE), 2),  
    min_experts = min(min_experts, na.rm = TRUE),               
    max_experts = max(max_experts, na.rm = TRUE)                
  ) %>%
  mutate(wave = "Wave 1 - 2018")  # Add the wave column


# We import the expert-level data for 2023. This dataset includes party IDs and the number of experts per party, per item.
# The data is structured at the expert level (rows) with multiple rows per party.
# The number of experts can vary per party and per item.

expert_data_2023 <- readRDS(here("data/poppa2_expert.rds")) %>%
  janitor::clean_names() %>%
  select(country,
         poppa_id,
         ends_with("_n_experts")) %>% 
  dplyr::filter(!all(is.na(c_across(ends_with("_n_experts"))))) 


# We read the excel file in with the up to date information, i.e. party names etc. 

poppa_excel_2022 <- readxl::read_excel(here("data/POPPA_List_of_parties_2022.xlsx"))  %>%
  janitor::clean_names() %>%
  select(poppa_id = party_id,
         party_short = abbreviation,
         party_name_english)


# We join the Excel file with the expert-level data to update party information (e.g., names).
# We set any values with fewer than 4 experts to NA to exclude them from analysis.

expert_data_2023_joined <- left_join(expert_data_2023, poppa_excel_2022, by = join_by(poppa_id)) %>%
  select(country,
         poppa_id,
         party_short,
         party_name_english,
         everything())  %>%
  mutate(across(ends_with("_n_experts"), ~ ifelse(. < 4, NA, .)))  # Replace values with NA if experts < 4


# We fix the following country names to match the data from 2018. 

expert_data_2023_joined <- expert_data_2023_joined %>%
  mutate(country = forcats::as_factor(country),
         country = forcats::fct_recode(country,
                                       "Czech Republic" = "Czech",
                                       "Belgium - Flanders" = "Flanders",
                                       "Belgium - Wallonia" = "Wallonie"))



# We take the first value for each party per item. 
# The values per party per item are the same.
# In essence we are collapsing the data.

expert_data_2023_joined_collapsed <- expert_data_2023_joined %>%
  group_by(country, poppa_id, party_short) %>%
  summarise(across(ends_with("_n_experts"), first))

# We calculate the mean, min, and max number of experts.
# Since we are using rowwise calculations, these values are calculated per party, across all items.
# Afterward, we summarize the mean, min, and max at the country level by averaging the party-level results.

summary_table_2023_country <- expert_data_2023_joined %>%
  rowwise() %>%  
  dplyr::filter(!all(is.na(c_across(ends_with("_n_experts"))))) %>%  
  mutate(
    mean_experts = round(mean(c_across(ends_with("_n_experts")), na.rm = TRUE), 2),  
    min_experts = min(c_across(ends_with("_n_experts")), na.rm = TRUE),              
    max_experts = max(c_across(ends_with("_n_experts")), na.rm = TRUE)               
  ) %>%
  ungroup() %>%  
  group_by(country) %>%
  summarise(
    mean_experts = round(mean(mean_experts, na.rm = TRUE), 2),  
    min_experts = min(min_experts, na.rm = TRUE),               
    max_experts = max(max_experts, na.rm = TRUE)                
  ) %>%
  mutate(wave = "Wave 2 - 2023")  


# We combine the 2018 and 2023 datasets for a country-level summary of expert evaluations across both waves.

combined_data_country <- dplyr::bind_rows(summary_table_2018_country, summary_table_2023_country)

# We calculate the overall mean, min, and max.

overall_row_country <- combined_data_country %>%
  group_by(wave) %>%
  dplyr::summarize(
    mean_experts = round(mean(mean_experts, na.rm = TRUE), 2),
    min_experts = min(min_experts, na.rm = TRUE),
    max_experts = max(max_experts, na.rm = TRUE),
  ) %>%
  mutate(country = "Overall")

# We relocate the 'country' column to ensure the correct order.

overall_row_country <- overall_row_country %>%
  dplyr::relocate(country, .before = mean_experts)

# We bind the new row to the dataset.

combined_data_country <- bind_rows(combined_data_country, overall_row_country)

# We pivot the dataset wider based on the 'wave' column.

combined_data_wide_country <- combined_data_country %>%
  pivot_wider(names_from = wave,
              values_from = c(mean_experts, min_experts, max_experts),
              names_glue = "{.value}_{wave}"
  ) %>% 
  select(
    Country = country,
    `Mean Experts Wave 1` = `mean_experts_Wave 1 - 2018`,
    `Min Experts Wave 1` = `min_experts_Wave 1 - 2018`,
    `Max Experts Wave 1` = `max_experts_Wave 1 - 2018`,
    `Mean Experts Wave 2` =  `mean_experts_Wave 2 - 2023`,
    `Min Experts Wave 2` = `min_experts_Wave 2 - 2023`,
    `Max Experts Wave 2` = `max_experts_Wave 2 - 2023`
  ) %>%
  mutate(Country = as.character(Country)) %>%  
  mutate(flag = if_else(Country == "Overall", 1, 0)) %>%  
  arrange(flag, Country) %>%                            
  select(-flag)    


# We print the table.

combined_data_wide_country %>%
  kable(
    format = "latex",  
    booktabs = TRUE,
    linesep = "") %>% 
  kable_classic() %>% 
  kable_styling(
    full_width = FALSE, 
    position = "center", 
    bootstrap_options = c("striped", "hover")
  ) %>%
  kable_styling(font_size = 8) %>%
  column_spec(2:7, width = "5em") %>% 
  row_spec(nrow(combined_data_wide_country), bold = TRUE) 


